## R Script Outputs ------------------------------------------------------------
# Figure 7: The Effect of Immigration Lobbying in 2017
# Appendix Table D.6: DiD Regression Results: 1992--2017
# Appendix Table D.7: DiD Regression Results: Augmented Lobbying Measures
# Appendix Table D.8: DiD Regression Results: 2016--2017
# Appendix Table D.11: DiD Regression Results: Two-Way Clustered Standard Errors
# Appendix Table D.12: DiD Regression Results: Excluding Large Outlier Firms
# Appendix Table D.13: DiD Regression Results: Controlling for Size
# Appendix Table D.14: DiD Regression Results: Time Placebos
# Appendix Table D.15: DiD Regression Results: Placebo Treatments


## Instructions ----------------------------------------------------------------
# Step 1: Adjust MAIN_DIR to where README.txt is located
# Step 2: Run entire script


## setup -----------------------------------------------------------------------
# clean slate
rm(list = ls())
date()

# load packages
pkg <- c("tidyverse",
         "lfe", 
         "stargazer",
         "xtable")

lapply(pkg, require, character.only = TRUE)

# set main directory
MAIN_DIR <- "~/Dropbox/Research/JOP-h1b-replication"


## load data -------------------------------------------------------------------
load(file = paste(MAIN_DIR, "data-merge-91-17-orbis-excluded.RData", sep = "/"))

df <- data.merge

# pre-treatment sample
df.pre <- df %>%
  filter(year < 2017) 

# two period sample
df.16.17 <- df %>% 
  filter(year == 2016 | year == 2017)


## create variable name concordance for tables ---------------------------------
var.df <- tibble(var = c(
  "h1b_deny_rate",
  
  "lob_img_2016",
  "lob_img_2017",
  "lob_text_hs_h1b_visa_2017",
  "lob_text_hs_h1b_visa_uscis_2017",
  "lob_text_hs_h1b_visa_dol_2017",
  "lob_text_hs_h1b_visa_dhs_2017",
  "lob_text_hs_h1b_visa_wh_eop_2017",
  "lob_text_hs_h1b_visa_only_congress_2017",
  "lob_only_tob_2017",
  "lob_only_bev_2017",
  "lob_only_cdt_2017",
  
  "lob_img_2017:post_2017",
  "lob_text_hs_h1b_visa_2017:post_2017",
  "lob_text_hs_h1b_visa_uscis_2017:post_2017",
  "lob_text_hs_h1b_visa_dol_2017:post_2017",
  "lob_text_hs_h1b_visa_dhs_2017:post_2017",
  "lob_text_hs_h1b_visa_wh_eop_2017:post_2017",
  "lob_text_hs_h1b_visa_only_congress_2017:post_2017",
  
  "lob_only_tob_2017:post_2017",
  "lob_only_bev_2017:post_2017",
  "lob_only_cdt_2017:post_2017",
  
  "sizeSmall",
  "sizeMedium",
  "sizeLarge",
  "sizeVery Large",
  "sizeVery_Large",
  "public",
  
  "lob_img_2017:post_2004",
  "post_2017:lob_img_2016",
  "lob_img_2017:year_1992",
  "lob_img_2017:year_1993",
  "lob_img_2017:year_1994",
  "lob_img_2017:year_1995",
  "lob_img_2017:year_1996",
  "lob_img_2017:year_1997",
  "lob_img_2017:year_1998",
  "lob_img_2017:year_1999",
  "lob_img_2017:year_2000",
  "lob_img_2017:year_2001",
  "lob_img_2017:year_2002",
  "lob_img_2017:year_2003",
  "lob_img_2017:year_2004",
  "lob_img_2017:year_2005",
  "lob_img_2017:year_2006",
  "lob_img_2017:year_2007",
  "lob_img_2017:year_2008",
  "lob_img_2017:year_2009",
  "lob_img_2017:year_2010",
  "lob_img_2017:year_2011",
  "lob_img_2017:year_2012",
  "lob_img_2017:year_2013",
  "lob_img_2017:year_2014",
  "lob_img_2017:year_2015"),
  var_name = c(
    "H-1B Denial Rate",
    
    "2016 IMM Lobbying (any)",
    "2017 IMM Lobbying (any)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'')",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets USCIS)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets DOL)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets DHS)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets WH/EOP)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets Only Congress)",
    "2017 Tobacco Lobbying Only",
    "2017 Beverage Lobbying Only",
    "2017 Commodities Lobbying Only",
    
    "2017 IMM Lobbying (any) x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'') x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets USCIS) x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets DOL) x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets DHS) x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets WH/EOP) x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets Only Congress) x Trump Administration (2017)",
    
    "Tobacco Lobbying Only in 2017 x Trump Administration (2017)",
    "Beverage Lobbying Only in 2017 x Trump Administration (2017)",
    "Commodities Lobbying Only in 2017 x Trump Administration (2017)",
    
    "Size: Small",
    "Size: Medium",
    "Size: Large",
    "Size: Very Large",
    "Size: Very Large",
    "Public Firm",
    
    "2017 IMM Lobbying (any) x Placebo Timing (2004)",
    "2016 IMM Lobbying (any) x Trump Administration (2017)",
    "2017 IMM Lobbying (any) x Placebo Timing (1992)",
    "2017 IMM Lobbying (any) x Placebo Timing (1993)",
    "2017 IMM Lobbying (any) x Placebo Timing (1994)",
    "2017 IMM Lobbying (any) x Placebo Timing (1995)",
    "2017 IMM Lobbying (any) x Placebo Timing (1996)",
    "2017 IMM Lobbying (any) x Placebo Timing (1997)",
    "2017 IMM Lobbying (any) x Placebo Timing (1998)",
    "2017 IMM Lobbying (any) x Placebo Timing (1999)",
    "2017 IMM Lobbying (any) x Placebo Timing (2000)",
    "2017 IMM Lobbying (any) x Placebo Timing (2001)",
    "2017 IMM Lobbying (any) x Placebo Timing (2002)",
    "2017 IMM Lobbying (any) x Placebo Timing (2003)",
    "2017 IMM Lobbying (any) x Placebo Timing (2004)",
    "2017 IMM Lobbying (any) x Placebo Timing (2005)",
    "2017 IMM Lobbying (any) x Placebo Timing (2006)",
    "2017 IMM Lobbying (any) x Placebo Timing (2007)",
    "2017 IMM Lobbying (any) x Placebo Timing (2008)",
    "2017 IMM Lobbying (any) x Placebo Timing (2009)",
    "2017 IMM Lobbying (any) x Placebo Timing (2010)",
    "2017 IMM Lobbying (any) x Placebo Timing (2011)",
    "2017 IMM Lobbying (any) x Placebo Timing (2012)",
    "2017 IMM Lobbying (any) x Placebo Timing (2013)",
    "2017 IMM Lobbying (any) x Placebo Timing (2014)",
    "2017 IMM Lobbying (any) x Placebo Timing (2015)")
)


## function to replace variable names ------------------------------------------
replaceVarName <- function(var.vec, var.df){
  # Prepare output vector
  out.vec <- rep(NA, length(var.vec))
  matches <- match(var.vec, var.df$var)
  out.vec <- var.df[matches,]$var_name
  
  if(any(is.na(out.vec))){
    warning(paste("Variable concordence missing: ", 
                  paste(var.vec[is.na(out.vec)], collapse = ", "), 
                  sep = ""))
  } else{
    
  }
  
  return(out.vec)
}


## Appendix Table D.6: DiD Regression Results: 1992–2017 -----------------------
# Column 1
did.h1b.lob.1 <- felm(h1b_deny_rate ~ 
                        lob_img_2017:post_2017
                      | bvd_id + year | 0 | bvd_id, 
                      keepX = TRUE,
                      data = df)
summary(did.h1b.lob.1)

# DiD estimate
coef(did.h1b.lob.1)["lob_img_2017:post_2017"] * 100

# compare effect size to variation in the outcome
round(abs((coef(did.h1b.lob.1)["lob_img_2017:post_2017"])/sd(did.h1b.lob.1$response, na.rm = TRUE)), 2) 

# compare effect size to mean denial rate
round(abs((coef(did.h1b.lob.1)["lob_img_2017:post_2017"])/mean(did.h1b.lob.1$response, na.rm = TRUE)), 2) 

# Column 2
did.h1b.lob.2.ave <- felm(h1b_deny_rate ~
                            lob_text_hs_h1b_visa_2017:post_2017
                          | bvd_id + year | 0 | bvd_id,
                          keepX = TRUE,
                          data = df)
summary(did.h1b.lob.2.ave)

# Column 3
did.h1b.lob.2.congress <- felm(h1b_deny_rate ~
                                 lob_text_hs_h1b_visa_only_congress_2017:post_2017 
                               | bvd_id + year | 0 | bvd_id,
                               keepX = TRUE,
                               data = df)
summary(did.h1b.lob.2.congress)

# Column 4
did.h1b.lob.2.uscis <- felm(h1b_deny_rate ~
                              lob_text_hs_h1b_visa_uscis_2017:post_2017
                            | bvd_id + year | 0 | bvd_id,
                            keepX = TRUE,
                            data = df)
summary(did.h1b.lob.2.uscis)

# Column 5
did.h1b.lob.2.dol <- felm(h1b_deny_rate ~
                            lob_text_hs_h1b_visa_dol_2017:post_2017
                          | bvd_id + year | 0 | bvd_id,
                          keepX = TRUE,
                          data = df)
summary(did.h1b.lob.2.dol)

# Column 6
did.h1b.lob.2.dhs <- felm(h1b_deny_rate ~
                            lob_text_hs_h1b_visa_dhs_2017:post_2017
                          | bvd_id + year | 0 | bvd_id,
                          keepX = TRUE,
                          data = df)
summary(did.h1b.lob.2.dhs)

# Column 7
did.h1b.lob.2.wh <- felm(h1b_deny_rate ~
                           lob_text_hs_h1b_visa_wh_eop_2017:post_2017
                         | bvd_id + year | 0 | bvd_id,
                         keepX = TRUE,
                         data = df)
summary(did.h1b.lob.2.wh)


# collect results
models.main <- list(did.h1b.lob.1,
                    did.h1b.lob.2.ave, 
                    did.h1b.lob.2.congress,
                    did.h1b.lob.2.uscis, 
                    did.h1b.lob.2.dol, 
                    did.h1b.lob.2.dhs,
                    did.h1b.lob.2.wh)

# extract clustered SEs
cse.main <- list(did.h1b.lob.1$cse,
                 did.h1b.lob.2.ave$cse,
                 did.h1b.lob.2.congress$cse,
                 did.h1b.lob.2.uscis$cse,
                 did.h1b.lob.2.dol$cse,
                 did.h1b.lob.2.dhs$cse,
                 did.h1b.lob.2.wh$cse)

# set var order
var.order.main <- c("^lob_img_2017:post_2017$",
                    "^lob_text_hs_h1b_visa_2017:post_2017$",
                    "^lob_text_hs_h1b_visa_only_congress_2017:post_2017$",
                    "^lob_text_hs_h1b_visa_uscis_2017:post_2017$",
                    "^lob_text_hs_h1b_visa_dol_2017:post_2017$",
                    "^lob_text_hs_h1b_visa_dhs_2017:post_2017$",
                    "^lob_text_hs_h1b_visa_wh_eop_2017:post_2017$")

# set var label
var.label.main <- str_replace_all(var.order.main, "\\^", "")
var.label.main <- str_replace_all(var.label.main, "\\$", "")

# save
sink(file.path(MAIN_DIR, "Appendix-Table-D6.txt"))
#sink(file.path(MAIN_DIR, "Appendix-Table-D6.tex"))
stargazer(models.main,
          se = cse.main,
          type = "text",
          #type = "latex",
          label = "tb:main",
          font.size = "tiny",
          single.row = FALSE,
          title = "DiD Regression Results: 1992--2017",
          dep.var.labels = "H-1B Denial Rates",
          model.numbers = TRUE,
          order = var.order.main,
          covariate.labels = replaceVarName(var.vec = var.label.main,
                                            var.df = var.df),
          #column.labels = rep("Scale", 4),
          add.lines = list(c("Fixed Effects: Firm (BvD ID)",
                             rep("Yes", 7)),
                           c("Fixed Effects: Year",
                             rep("Yes", 7)),
                           c("Group Size: Firms",
                             formatC(length(levels(did.h1b.lob.1$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.ave$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.congress$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.uscis$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dol$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dhs$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.wh$fe$bvd_id)), big.mark = ",")
                           ),
                           c("Group Size: Years",
                             formatC(length(levels(did.h1b.lob.1$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.ave$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.congress$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.uscis$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dol$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dhs$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.wh$fe$year)), big.mark = ",")
                           )),
          omit.stat = c("res.dev",
                        "ser"),
          digits = 3,
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          #notes = c("Standard errors clustered by firms in parentheses. $^{*}$p$<$0.05; $^{***}$p$<$0.01; $^{***}$p$<$0.001. Note that firm and year fixed-effects subsume the constitutive terms of the interactions. Column (1) estimates the effect of immigration lobbying in 2017 ignoring specific lobbying issues and target venues. Column (2) estimates the effect when only considering 2017 immigration lobbying that specifies ``Skilled''/``H-1B''/``Visa'' in reports. Column (3)--(7) further restricts the treatment condition to only firms that targeted specific government agencies."),
          notes.align = "l",
          notes.append = FALSE)
sink()


## Figure 7: The Effect of Immigration Lobbying in 2017 ------------------------
# extract coefs and compute CIs
coef.df.main <- tibble(model = c("Baseline",
                                 "Text",
                                 "Text & USCIS",
                                 "Text & DOL",
                                 "Text & DHS",
                                 "Text & WH/EOP",
                                 "Text & Congress"),
coef = c(coef(did.h1b.lob.1)["lob_img_2017:post_2017"],
         coef(did.h1b.lob.2.ave)["lob_text_hs_h1b_visa_2017:post_2017"],
         coef(did.h1b.lob.2.uscis)["lob_text_hs_h1b_visa_uscis_2017:post_2017"],
         coef(did.h1b.lob.2.dol)["lob_text_hs_h1b_visa_dol_2017:post_2017"],
         coef(did.h1b.lob.2.dhs)["lob_text_hs_h1b_visa_dhs_2017:post_2017"],
         coef(did.h1b.lob.2.wh)["lob_text_hs_h1b_visa_wh_eop_2017:post_2017"],
         coef(did.h1b.lob.2.congress)["lob_text_hs_h1b_visa_only_congress_2017:post_2017"]),
lwr = c(confint(did.h1b.lob.1)["lob_img_2017:post_2017", 1],
        confint(did.h1b.lob.2.ave)["lob_text_hs_h1b_visa_2017:post_2017", 1],
        confint(did.h1b.lob.2.uscis)["lob_text_hs_h1b_visa_uscis_2017:post_2017", 1],
        confint(did.h1b.lob.2.dol)["lob_text_hs_h1b_visa_dol_2017:post_2017", 1],
        confint(did.h1b.lob.2.dhs)["lob_text_hs_h1b_visa_dhs_2017:post_2017", 1],
        confint(did.h1b.lob.2.wh)["lob_text_hs_h1b_visa_wh_eop_2017:post_2017", 1],
        confint(did.h1b.lob.2.congress)["lob_text_hs_h1b_visa_only_congress_2017:post_2017", 1]),
upr = c(confint(did.h1b.lob.1)["lob_img_2017:post_2017", 2],
        confint(did.h1b.lob.2.ave)["lob_text_hs_h1b_visa_2017:post_2017", 2],
        confint(did.h1b.lob.2.uscis)["lob_text_hs_h1b_visa_uscis_2017:post_2017", 2],
        confint(did.h1b.lob.2.dol)["lob_text_hs_h1b_visa_dol_2017:post_2017", 2],
        confint(did.h1b.lob.2.dhs)["lob_text_hs_h1b_visa_dhs_2017:post_2017", 2],
        confint(did.h1b.lob.2.wh)["lob_text_hs_h1b_visa_wh_eop_2017:post_2017", 2],
        confint(did.h1b.lob.2.congress)["lob_text_hs_h1b_visa_only_congress_2017:post_2017", 2])) %>%
  mutate(model = factor(model, 
                        levels = c("Baseline",
                                   "Text",
                                   "Text & USCIS",
                                   "Text & DOL",
                                   "Text & DHS",
                                   "Text & WH/EOP",
                                   "Text & Congress")))

# df for annotations
txt.shift.main <- 0.005
txt.pos.main <- tibble(model = coef.df.main$model,
                       coef = c(coef.df.main[1,]$lwr - txt.shift.main,
                                coef.df.main[2,]$lwr - txt.shift.main,
                                coef.df.main[3,]$lwr - 1.5 * txt.shift.main,
                                coef.df.main[4,]$lwr - 1.5 * txt.shift.main,
                                coef.df.main[5,]$lwr - 1.5 * txt.shift.main,
                                coef.df.main[6,]$lwr - 1.5 * txt.shift.main,
                                coef.df.main[7,]$lwr - 1.5 * txt.shift.main),
                       lwr = coef.df.main$lwr,
                       upr = coef.df.main$upr)

# plot
axis.title.size <- 16

p.coef.main <- ggplot(data = coef.df.main,
                      aes(x = model, y = coef,
                          ymin = lwr, 
                          ymax = upr)) +
  geom_pointrange(size = 1) +
  geom_hline(yintercept = 0,
             color = I(hsv(0/12, 7/12, 7/12)), 
             linetype = "dashed") +
  geom_text(data = txt.pos.main,
            aes(label = model),
            size = axis.title.size - 12.4) +
  scale_x_discrete("") +
  scale_y_continuous("\nDiD Effect of 2017 Immigration Lobbying",
                     breaks = c(-0.09, -0.06, -0.03, 0, 0.03),
                     labels = c(-0.09, -0.06, -0.03, 0, 0.03),
                     limits = c(-0.09, 0.03)) +
  theme_bw() +
  theme(legend.position = "none",
        plot.title = element_text(size = axis.title.size + 4,
                                  face = "bold",
                                  hjust = 0.5,
                                  margin = margin(0, 0, 20, 0)),
        plot.margin = margin(0.1, 0.5, 0.1, 0.1, "cm"),
        axis.title.y = element_text(size = axis.title.size,
                                    margin = margin(0, 20, 0, 0)),
        axis.text.y = element_text(size = axis.title.size),
        strip.background = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        strip.text = element_text(size = axis.title.size + 4,
                                  margin = margin(20, 0, 20, 0)),
        panel.grid = element_blank()) 

# save
pdf(file = paste(MAIN_DIR, "Figure-7.pdf", sep = "/"),
    width = 8.8, height = 4.2)
print(p.coef.main)
dev.off()


## Appendix Table D.7: DiD Regression Results: Augmented Lobbying Measures -----
# Column 1
did.h1b.lob.1.aug <- felm(h1b_deny_rate ~ 
                            lob_img_2017_aug:post_2017
                          | bvd_id + year | 0 | bvd_id, 
                          keepX = TRUE,
                          data = df)
summary(did.h1b.lob.1.aug)  

# Column 2
did.h1b.lob.2.aug <- felm(h1b_deny_rate ~
                            lob_text_hs_h1b_visa_2017_aug:post_2017
                          | bvd_id + year | 0 | bvd_id,
                          keepX = TRUE,
                          data = df)
summary(did.h1b.lob.2.aug)

# Column 3
did.h1b.lob.2.congress.aug <- felm(h1b_deny_rate ~
                                     lob_text_hs_h1b_visa_only_congress_2017_aug:post_2017 
                                   | bvd_id + year | 0 | bvd_id,
                                   keepX = TRUE,
                                   data = df)
summary(did.h1b.lob.2.congress.aug)


# Column 4
did.h1b.lob.2.uscis.aug <- felm(h1b_deny_rate ~
                                  lob_text_hs_h1b_visa_uscis_2017_aug:post_2017
                                | bvd_id + year | 0 | bvd_id,
                                keepX = TRUE,
                                data = df)
summary(did.h1b.lob.2.uscis.aug)


# Column 5
did.h1b.lob.2.dol.aug <- felm(h1b_deny_rate ~
                                lob_text_hs_h1b_visa_dol_2017_aug:post_2017
                              | bvd_id + year | 0 | bvd_id,
                              keepX = TRUE,
                              data = df)
summary(did.h1b.lob.2.dol.aug)

# Column 6
did.h1b.lob.2.dhs.aug <- felm(h1b_deny_rate ~
                                lob_text_hs_h1b_visa_dhs_2017_aug:post_2017
                              | bvd_id + year | 0 | bvd_id,
                              keepX = TRUE,
                              data = df)
summary(did.h1b.lob.2.dhs.aug)


# column 7
did.h1b.lob.2.wh.aug <- felm(h1b_deny_rate ~
                               lob_text_hs_h1b_visa_wh_eop_2017_aug:post_2017
                             | bvd_id + year | 0 | bvd_id,
                             keepX = TRUE,
                             data = df)
summary(did.h1b.lob.2.wh.aug)

# collect results
models.aug <- list(did.h1b.lob.1.aug,
                   did.h1b.lob.2.aug, 
                   did.h1b.lob.2.congress.aug,
                   did.h1b.lob.2.uscis.aug,
                   did.h1b.lob.2.dol.aug,
                   did.h1b.lob.2.dhs.aug, 
                   did.h1b.lob.2.wh.aug)

# extract clustered SEs
cse.aug <- list(did.h1b.lob.1.aug$cse,
                did.h1b.lob.2.aug$cse,
                did.h1b.lob.2.congress.aug$cse,
                did.h1b.lob.2.uscis.aug$cse,
                did.h1b.lob.2.dol.aug$cse,
                did.h1b.lob.2.dhs.aug$cse,
                did.h1b.lob.2.wh.aug$cse)

# set var order
var.order.aug <- c("^lob_img_2017:post_2017$",
                   "^lob_text_hs_h1b_visa_2017:post_2017$",
                   "^lob_text_hs_h1b_visa_only_congress_2017:post_2017$",
                   "^lob_text_hs_h1b_visa_uscis_2017:post_2017$",
                   "^lob_text_hs_h1b_visa_dol_2017:post_2017$",
                   "^lob_text_hs_h1b_visa_dhs_2017:post_2017$",
                   "^lob_text_hs_h1b_visa_wh_eop_2017:post_2017$")

# set var label
var.label.aug <- str_replace_all(var.order.aug, "\\^", "")
var.label.aug <- str_replace_all(var.label.aug, "\\$", "")

# save
sink(file.path(MAIN_DIR, "Appendix-Table-D7.txt"))
#sink(file.path(MAIN_DIR, "Appendix-Table-D7.tex"))
stargazer(models.aug,
          se = cse.aug,
          type = "text",
          #type = "latex",
          label = "tb:aug",
          font.size = "tiny",
          single.row = FALSE,
          title = "DiD Regression Results: Augmented Lobbying Measures",
          dep.var.labels = "H-1B Denial Rates",
          model.numbers = TRUE,
          order = var.order.aug,
          covariate.labels = replaceVarName(var.vec = var.label.aug,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: Firm (BvD ID)",
                             rep("Yes", 7)),
                           c("Fixed Effects: Year",
                             rep("Yes", 7)),
                           c("Group Size: Firms",
                             formatC(length(levels(did.h1b.lob.1.aug$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.aug$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.congress.aug$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.uscis.aug$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dol.aug$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dhs.aug$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.wh.aug$fe$bvd_id)), big.mark = ",")
                           ),
                           c("Group Size: Years",
                             formatC(length(levels(did.h1b.lob.1.aug$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.aug$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.congress.aug$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.uscis.aug$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dol.aug$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dhs.aug$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.wh.aug$fe$year)), big.mark = ",")
                           )
          ),
          omit.stat = c("res.dev",
                        "ser"),
          digits = 3,
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          #notes = c("Standard errors clustered by firms in parentheses. $^{*}$p$<$0.05; $^{***}$p$<$0.01; $^{***}$p$<$0.001. Note that firm and year fixed-effects subsume the constitutive terms of the interactions. This table presents using augmented measures of lobbying based on 2017 reports with either IMM as a general issue code or non-IMM reports with immigration-related text (immigration, visa, or H-1B)."),
          notes.align = "l",
          notes.append = FALSE)
sink()


## Appendix Table D.8: DiD Regression Results: 2016–2017 -----------------------
# Column 1
did.h1b.lob.2y <- felm(h1b_deny_rate ~ 
                         lob_img_2017:post_2017 
                       | bvd_id + year | 0 | bvd_id, 
                       keepX = TRUE,
                       data = df.16.17)
summary(did.h1b.lob.2y) 

# Column 2
did.h1b.lob.2y.text <- felm(h1b_deny_rate ~
                              lob_text_hs_h1b_visa_2017:post_2017
                            | bvd_id + year | 0 | bvd_id,
                            keepX = TRUE,
                            data = df.16.17)
summary(did.h1b.lob.2y.text)

# Column 3
did.h1b.lob.2y.text.congress <- felm(h1b_deny_rate ~
                                       lob_text_hs_h1b_visa_only_congress_2017:post_2017 
                                     | bvd_id + year | 0 | bvd_id,
                                     keepX = TRUE,
                                     data = df.16.17)
summary(did.h1b.lob.2y.text.congress)

# Column 4
did.h1b.lob.2y.text.uscis <- felm(h1b_deny_rate ~
                                    lob_text_hs_h1b_visa_uscis_2017:post_2017
                                  | bvd_id + year | 0 | bvd_id,
                                  keepX = TRUE,
                                  data = df.16.17)
summary(did.h1b.lob.2y.text.uscis)

# Column 5
did.h1b.lob.2y.text.dol <- felm(h1b_deny_rate ~
                                  lob_text_hs_h1b_visa_dol_2017:post_2017
                                | bvd_id + year | 0 | bvd_id,
                                keepX = TRUE,
                                data = df.16.17)
summary(did.h1b.lob.2y.text.dol)

# Column 6
did.h1b.lob.2y.text.dhs <- felm(h1b_deny_rate ~
                                  lob_text_hs_h1b_visa_dhs_2017:post_2017
                                | bvd_id + year | 0 | bvd_id,
                                keepX = TRUE,
                                data = df.16.17)
summary(did.h1b.lob.2y.text.dhs)

# Column 7
did.h1b.lob.2y.text.wh <- felm(h1b_deny_rate ~
                                 lob_text_hs_h1b_visa_wh_eop_2017:post_2017
                               | bvd_id + year | 0 | bvd_id,
                               keepX = TRUE,
                               data = df.16.17)
summary(did.h1b.lob.2y.text.wh)

# collect results
models.2y <- list(did.h1b.lob.2y,
                  did.h1b.lob.2y.text,
                  did.h1b.lob.2y.text.congress,
                  did.h1b.lob.2y.text.uscis,
                  did.h1b.lob.2y.text.dol,
                  did.h1b.lob.2y.text.dhs,
                  did.h1b.lob.2y.text.wh)

# extract clustered SEs
cse.2y <- list(did.h1b.lob.2y$cse,
               did.h1b.lob.2y.text$cse,
               did.h1b.lob.2y.text.congress$cse,
               did.h1b.lob.2y.text.uscis$cse,
               did.h1b.lob.2y.text.dol$cse,
               did.h1b.lob.2y.text.dhs$cse,
               did.h1b.lob.2y.text.wh$cse)

# set var order
var.order.2y <- c("^lob_img_2017:post_2017$",
                  "^lob_text_hs_h1b_visa_2017:post_2017$",
                  "^lob_text_hs_h1b_visa_only_congress_2017:post_2017$",
                  "^lob_text_hs_h1b_visa_uscis_2017:post_2017$",
                  "^lob_text_hs_h1b_visa_dol_2017:post_2017$",
                  "^lob_text_hs_h1b_visa_dhs_2017:post_2017$",
                  "^lob_text_hs_h1b_visa_wh_eop_2017:post_2017$")

# set var label
var.label.2y <- str_replace_all(var.order.2y, "\\^", "")
var.label.2y <- str_replace_all(var.label.2y, "\\$", "")

# save
sink(file.path(MAIN_DIR, "Appendix-Table-D8.txt"))
#sink(file.path(MAIN_DIR, "Appendix-Table-D8.tex"))
stargazer(models.2y,
          se = cse.2y,
          type = "text",
          #type = "latex",
          label = "tb:2y",
          font.size = "tiny",
          single.row = FALSE,
          title = "DiD Regression Results: Two Periods, 2016--2017",
          dep.var.labels = "H-1B Denial Rates",
          model.numbers = TRUE,
          order = var.order.2y,
          covariate.labels = replaceVarName(var.vec = var.label.2y,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: Firm (BvD ID)",
                             rep("Yes", 7)),
                           c("Fixed Effects: Year",
                             rep("Yes", 7)),
                           c("Group Size: Firms",
                             formatC(length(levels(did.h1b.lob.2y$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text.congress$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text.uscis$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text.dol$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text.dhs$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text.wh$fe$bvd_id)), big.mark = ",")),
                           c("Group Size: Years",
                             formatC(length(levels(did.h1b.lob.2y$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text.congress$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text.uscis$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text.dol$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text.dhs$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2y.text.wh$fe$year)), big.mark = ","))),
          omit.stat = c("res.dev",
                        "ser"),
          digits = 3,
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          #notes = c("Standard errors clustered by firms in parentheses. $^{*}$p$<$0.05; $^{***}$p$<$0.01; $^{***}$p$<$0.001. Note that firm and year fixed-effects subsume the constitutive terms of the interactions. Column (1) estimates the effect of immigration lobbying in 2017 ignoring specific lobbying issues and target venues. Column (2) estimates the effect when only considering 2017 immigration lobbying that specifies ``Skilled''/``H-1B''/``Visa'' in reports. Column (3)--(7) further restricts the treatment condition to only firms that targeted specific government agencies."),
          notes.align = "l",
          notes.append = FALSE)
sink()


## Table D.11: DiD Regression Results: Two-Way Clustered Standard Errors -------
# Column 1 
did.h1b.lob.1.twoway <- felm(h1b_deny_rate ~ 
                               lob_img_2017:post_2017
                             | bvd_id + year | 0 | bvd_id + year, 
                             keepX = TRUE,
                             data = df)
summary(did.h1b.lob.1.twoway)  

# Column 2
did.h1b.lob.2.ave.twoway <- felm(h1b_deny_rate ~
                                   lob_text_hs_h1b_visa_2017:post_2017
                                 | bvd_id + year | 0 | bvd_id + year,
                                 keepX = TRUE,
                                 data = df)
summary(did.h1b.lob.2.ave.twoway)

# Column 3 
did.h1b.lob.2.uscis.twoway <- felm(h1b_deny_rate ~
                                     lob_text_hs_h1b_visa_uscis_2017:post_2017
                                   | bvd_id + year | 0 | bvd_id + year,
                                   keepX = TRUE,
                                   data = df)
summary(did.h1b.lob.2.uscis.twoway)

# Column 4
did.h1b.lob.2.dol.twoway <- felm(h1b_deny_rate ~
                                   lob_text_hs_h1b_visa_dol_2017:post_2017
                                 | bvd_id + year | 0 | bvd_id + year,
                                 keepX = TRUE,
                                 data = df)
summary(did.h1b.lob.2.dol.twoway)

# Column 5
did.h1b.lob.2.dhs.twoway <- felm(h1b_deny_rate ~
                                   lob_text_hs_h1b_visa_dhs_2017:post_2017
                                 | bvd_id + year | 0 | bvd_id + year,
                                 keepX = TRUE,
                                 data = df)
summary(did.h1b.lob.2.dhs.twoway)

# Column 6
did.h1b.lob.2.wh.twoway <- felm(h1b_deny_rate ~
                                  lob_text_hs_h1b_visa_wh_eop_2017:post_2017
                                | bvd_id + year | 0 | bvd_id + year,
                                keepX = TRUE,
                                data = df)
summary(did.h1b.lob.2.wh.twoway)

# collect results
models.cse.twoway <- list(did.h1b.lob.1.twoway,
                          did.h1b.lob.2.ave.twoway, 
                          did.h1b.lob.2.uscis.twoway,
                          did.h1b.lob.2.dol.twoway,
                          did.h1b.lob.2.dhs.twoway,
                          did.h1b.lob.2.wh.twoway)

# extract clustered SEs
cse.twoway <- list(did.h1b.lob.1.twoway$cse,
                   did.h1b.lob.2.ave.twoway$cse,
                   did.h1b.lob.2.uscis.twoway$cse,
                   did.h1b.lob.2.dol.twoway$cse,
                   did.h1b.lob.2.dhs.twoway$cse,
                   did.h1b.lob.2.wh.twoway$cse)

# set var order
var.order.twoway <- c("^lob_img_2017:post_2017$",
                      "^lob_text_hs_h1b_visa_2017:post_2017$",
                      "^lob_text_hs_h1b_visa_uscis_2017:post_2017$",
                      "^lob_text_hs_h1b_visa_dol_2017:post_2017$",
                      "^lob_text_hs_h1b_visa_dhs_2017:post_2017$",
                      "^lob_text_hs_h1b_visa_wh_eop_2017:post_2017$")

# set var label
var.label.twoway <- str_replace_all(var.order.twoway, "\\^", "")
var.label.twoway <- str_replace_all(var.label.twoway, "\\$", "")

# save
sink(file.path(MAIN_DIR, "Appendix-Table-D11.txt"))
#sink(file.path(MAIN_DIR, "Appendix-Table-D11.tex"))
stargazer(models.cse.twoway,
          se = cse.twoway,
          type = "text",
          #type = "latex",
          label = "tb:cse-twoway",
          font.size = "tiny",
          single.row = FALSE,
          title = "DiD Regression Results: Two-Way Clustered Standard Errors",
          dep.var.labels = "H-1B Denial Rates",
          model.numbers = TRUE,
          order = var.order.main,
          covariate.labels = replaceVarName(var.vec = var.label.twoway,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: Firm (BvD ID)",
                             rep("Yes", 6)),
                           c("Fixed Effects: Year",
                             rep("Yes", 6)),
                           c("Group Size: Firms",
                             formatC(length(levels(did.h1b.lob.1.twoway$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.ave.twoway$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.uscis.twoway$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dol.twoway$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dhs.twoway$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.wh.twoway$fe$bvd_id)), big.mark = ",")
                           ),
                           c("Group Size: Years",
                             formatC(length(levels(did.h1b.lob.1.twoway$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.ave.twoway$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.uscis.twoway$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dol.twoway$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.dhs.twoway$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.2.wh.twoway$fe$year)), big.mark = ",")
                           )
          ),
          omit.stat = c("res.dev",
                        "ser"),
          digits = 3,
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          #notes = c("Standard errors clustered by both firms and years in parentheses. $^{*}$p$<$0.05; $^{***}$p$<$0.01; $^{***}$p$<$0.001. Note that firm and year fixed-effects subsume the constitutive terms of the interactions. Column (1) estimate the effect of immigration lobbying in 2017 ignoring specific lobbying issues and target venues. Column (2) estimates the effect when only considering 2017 immigration lobbying that specifies ``Skilled''/``H-1B''/``Visa'' in reports. Columns (3)--(6) further restricts the treatment condition to only firms that targeted specific government agencies."),
          notes.align = "l",
          notes.append = FALSE)
sink()


## Appendix Table D.12: DiD Regression Results: Excluding Large Outlier Firms ----
#
## IMPORTANT NOTE --------------------------------------------------------------
# Appendix Table D.12 uses Orbis' proprietary data on firm sales and employment. 
# To protect Orbis' proprietary data, this script loads resulting saved data on 
# outlier firms for these variables. I identify and extract outliers using the 
# code below.
#
# # load main dataset
# load(file = paste(MAIN_DIR, "data-merge-91-17.RData", sep = "/"))
# 
# # subset 2017 sales
# df.sales.17 <- data.merge %>% 
#   filter(year == 2017) %>%
#   mutate(ln_sales = log(sales + 1))
# 
# # identify outliers
# sales.outlier.vec <- boxplot.stats(df.sales.17$sales)$out
# sales.outlier.vec <- sales.outlier.vec[sales.outlier.vec > summary(df.sales.17$sales)["3rd Qu."]]
# sales.outlier.vec
# 
# sales.outlier.df <- df.sales.17 %>%
#   filter(sales %in% sales.outlier.vec)
# 
# # extract outliers
# sales.outlier.df$bvd_name
# sales.outlier.df$bvd_id
# n_distinct(sales.outlier.df$bvd_id)
# 
# sales.outlier.bvdid <- sales.outlier.df$bvd_id
# 
# # save
# save(sales.outlier.bvdid, 
#      file = paste(MAIN_DIR, "Appendix-Table-D12-sales-outliers.RData", sep = "/"))
# 
#
# # subset 2017 employment
# df.employ.17 <- data.merge %>%
#   filter(year == 2017) %>%
#   mutate(ln_employ = log(n_employ))
# 
# # identify outliers
# employ.outlier.vec <- boxplot.stats(df.employ.17$n_employ)$out
# employ.outlier.vec <- employ.outlier.vec[employ.outlier.vec > summary(df.employ.17$n_employ)["3rd Qu."]]
# employ.outlier.vec
# 
# employ.outlier.df <- df.employ.17 %>%
#   filter(n_employ %in% employ.outlier.vec)
# 
# # extract outliers
# employ.outlier.df$bvd_name
# employ.outlier.df$bvd_id
# n_distinct(employ.outlier.df$bvd_id)
# 
# employ.outliers.bvdid <- employ.outlier.df$bvd_id
# 
# # save
# save(employ.outliers.bvdid,
#      file = paste(MAIN_DIR, "Appendix-Table-D12-employ-outliers.RData", sep = "/"))

# 2017 Sales
# load saved data on sales outliers
load(paste(MAIN_DIR, "Appendix-Table-D12-sales-outliers.RData", sep = "/"))

# exclude outliers
df.sales.rm.outlier <- df %>%
  filter(!(bvd_id %in% sales.outlier.bvdid))

# Column 1
did.h1b.lob.sales.rm.outlier <- felm(h1b_deny_rate ~ lob_img_2017:post_2017
                                     | bvd_id + year | 0 | bvd_id, 
                                     keepX = TRUE,
                                     data = df.sales.rm.outlier)
summary(did.h1b.lob.sales.rm.outlier)  

# Column 2
did.h1b.lob.text.sales.rm.outlier <- felm(h1b_deny_rate ~ lob_text_hs_h1b_visa_2017:post_2017
                                          | bvd_id + year | 0 | bvd_id, 
                                          keepX = TRUE,
                                          data = df.sales.rm.outlier)
summary(did.h1b.lob.text.sales.rm.outlier)  


# 2017 Employment
# load saved data on employment outliers
load(paste(MAIN_DIR, "Appendix-Table-D12-employ-outliers.RData", sep = "/"))

# exclude outliers
df.employ.rm.outlier <- df %>%
  filter(!(bvd_id %in% employ.outliers.bvdid))

# Column 3
did.h1b.lob.employ.rm.outlier <- felm(h1b_deny_rate ~ lob_img_2017:post_2017
                                      | bvd_id + year | 0 | bvd_id, 
                                      keepX = TRUE,
                                      data = df.employ.rm.outlier)
summary(did.h1b.lob.employ.rm.outlier)  

# Column 4
did.h1b.lob.text.employ.rm.outlier <- felm(h1b_deny_rate ~ lob_text_hs_h1b_visa_2017:post_2017
                                           | bvd_id + year | 0 | bvd_id, 
                                           keepX = TRUE,
                                           data = df.employ.rm.outlier)
summary(did.h1b.lob.text.employ.rm.outlier)  


# 2017 IMG Lobby Expense
# subset
df.expense.17 <- data.merge %>% 
  filter(year == 2017) %>%
  filter(n_img_rep_year > 0) %>%
  mutate(ln_est_img_expense_fy = log(est_img_expense_fy))

# identify outliers
expense.outlier.vec <- boxplot.stats(df.expense.17$est_img_expense_fy)$out
expense.outlier.vec <- expense.outlier.vec[expense.outlier.vec > summary(df.expense.17$est_img_expense_fy)["3rd Qu."]]
expense.outlier.vec

expense.outlier.df <- df.expense.17 %>%
  filter(est_img_expense_fy %in% expense.outlier.vec)

# extract outliers
expense.outlier.df$bvd_name
expense.outlier.df$bvd_id
n_distinct(expense.outlier.df$bvd_id)

expense.outliers.bvdid <- expense.outlier.df$bvd_id

# drop outliers
df.expense.rm.outlier <- df %>%
  filter(!(bvd_id %in% expense.outlier.df$bvd_id))

# Column 5
did.h1b.lob.expense.rm.outlier <- felm(h1b_deny_rate ~ lob_img_2017:post_2017
                                       | bvd_id + year | 0 | bvd_id, 
                                       keepX = TRUE,
                                       data = df.expense.rm.outlier)
summary(did.h1b.lob.expense.rm.outlier) 

# Column 6
did.h1b.lob.text.expense.rm.outlier <- felm(h1b_deny_rate ~ lob_text_hs_h1b_visa_2017:post_2017
                                            | bvd_id + year | 0 | bvd_id, 
                                            keepX = TRUE,
                                            data = df.expense.rm.outlier)
summary(did.h1b.lob.text.expense.rm.outlier)

# collect results
models.exclude.large <- list(did.h1b.lob.sales.rm.outlier,
                             did.h1b.lob.text.sales.rm.outlier, 
                             did.h1b.lob.employ.rm.outlier,
                             did.h1b.lob.text.employ.rm.outlier,
                             did.h1b.lob.expense.rm.outlier,
                             did.h1b.lob.text.expense.rm.outlier)

# extract clustered SEs
cse.exclude.large <- list(did.h1b.lob.sales.rm.outlier$cse,
                          did.h1b.lob.text.sales.rm.outlier$cse, 
                          did.h1b.lob.employ.rm.outlier$cse,
                          did.h1b.lob.text.employ.rm.outlier$cse,
                          did.h1b.lob.expense.rm.outlier$cse,
                          did.h1b.lob.text.expense.rm.outlier$cse)

# set var order
var.order.exclude.large <- c("^lob_img_2017:post_2017$",
                             "^lob_text_hs_h1b_visa_2017:post_2017$")

# set var label
var.label.exclude.large <- str_replace_all(var.order.exclude.large, "\\^", "")
var.label.exclude.large <- str_replace_all(var.label.exclude.large, "\\$", "")

# save
sink(file.path(MAIN_DIR, "Appendix-Table-D12.txt"))
#sink(file.path(MAIN_DIR, "Appendix-Table-D12.tex"))
stargazer(models.exclude.large,
          se = cse.exclude.large,
          type = "text",
          #type = "latex",
          label = "tb:robust-exclude-extremely-large",
          font.size = "tiny",
          single.row = FALSE,
          title = "DiD Regression Results: Excluding Large Outlier Firms",
          dep.var.labels = "H-1B Denial Rates",
          model.numbers = TRUE,
          order = var.order.exclude.large,
          covariate.labels = replaceVarName(var.vec = var.label.exclude.large,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: Firm (BvD ID)",
                             rep("Yes", 6)),
                           c("Fixed Effects: Year",
                             rep("Yes", 6)),
                           c("Group Size: Firms",
                             formatC(length(levels(did.h1b.lob.sales.rm.outlier$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.text.sales.rm.outlier$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.employ.rm.outlier$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.text.employ.rm.outlier$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.expense.rm.outlier$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.text.expense.rm.outlier$fe$bvd_id)), big.mark = ",")),
                           c("Group Size: Years",
                             formatC(length(levels(did.h1b.lob.sales.rm.outlier$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.text.sales.rm.outlier$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.employ.rm.outlier$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.text.employ.rm.outlier$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.expense.rm.outlier$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.text.expense.rm.outlier$fe$year)), big.mark = ","))),
          omit.stat = c("res.dev",
                        "ser"),
          digits = 3,
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          #notes = c("Standard errors clustered by firms in parentheses. $^{*}$p$<$0.05; $^{***}$p$<$0.01; $^{***}$p$<$0.001. Note that firm and year fixed-effects subsume the constitutive terms of the interactions. Large outlier firms are identified using the interquartile range (IQR) criterion (Q3 + 1.5 $\times$ IQR) on 2017 sales (columns (1)--(2)), 2017 employment (columns (3)--(4)), and 2017 estimated immigration lobbying expenses (columns (5)--(6))."),
          notes.align = "l",
          notes.append = FALSE)
sink()


## Appendix Table D.13: DiD Regression Results: Controlling for Size -----------
#
## IMPORTANT NOTE --------------------------------------------------------------
# Appendix Table D.13 uses Orbis' proprietary data on firm size. To protect 
# Orbis' proprietary data, this script loads saved regression output data 
# from the code below.
#
# # load main dataset
# load(file = paste(MAIN_DIR, "data-merge-91-17.RData", sep = "/"))
# df <- data.merge
# 
# # Column 1
# did.h1b.size <- felm(h1b_deny_rate ~
#                         lob_img_2017 +
#                         lob_img_2017:post_2017 +
#                         size
#                       | naics_core + year | 0 | bvd_id,
#                       #keepX = TRUE,
#                       keepX = FALSE,
#                       data = df)
# summary(did.h1b.size)
# 
# # save
# save(did.h1b.size,
#      file = paste(MAIN_DIR, "Appendix-Table-D13-c1-anonymous-output.RData", sep = "/"))
# 
# # Column 2
# did.h1b.public <- felm(h1b_deny_rate ~
#                          lob_img_2017 +
#                          lob_img_2017:post_2017 +
#                          public
#                        | naics_core + year | 0 | bvd_id,
#                        #keepX = TRUE,
#                        keepX = FALSE,
#                        data = df)
# summary(did.h1b.public)
# 
# # save
# save(did.h1b.public,
#      file = paste(MAIN_DIR, "Appendix-Table-D13-c2-anonymous-output.RData", sep = "/"))
# 
# 
# # Column 3
# did.h1b.size.public <- felm(h1b_deny_rate ~
#                               lob_img_2017 +
#                               lob_img_2017:post_2017 +
#                               size + public
#                             | naics_core + year | 0 | bvd_id,
#                             #keepX = TRUE,
#                             keepX = FALSE,
#                             data = df)
# 
# summary(did.h1b.size.public)
# 
# # save
# save(did.h1b.size.public,
#      file = paste(MAIN_DIR, "Appendix-Table-D13-c3-anonymous-output.RData", sep = "/"))

# load pre-saved anonymous regression output data
load(paste(MAIN_DIR, "Appendix-Table-D13-c1-anonymous-output.RData", sep = "/"))
load(paste(MAIN_DIR, "Appendix-Table-D13-c2-anonymous-output.RData", sep = "/"))
load(paste(MAIN_DIR, "Appendix-Table-D13-c3-anonymous-output.RData", sep = "/"))

# collect results
models.size <- list(did.h1b.size,
                    did.h1b.public,
                    did.h1b.size.public)

# extract clustered SEs
cse.size <- list(did.h1b.size$cse,
                 did.h1b.public$cse,
                 did.h1b.size.public$cse)

# set var order
var.order.size <- c("^lob_img_2017$",
                    "^lob_img_2017:post_2017$",
                    "^sizeMedium$",
                    "^sizeLarge$",
                    "^sizeVery Large$",
                    "^public$")

# set var label
var.label.size <- str_replace_all(var.order.size, "\\^", "")
var.label.size <- str_replace_all(var.label.size, "\\$", "")

# save
sink(file.path(MAIN_DIR, "Appendix-Table-D13.txt"))
#sink(file.path(MAIN_DIR, "Appendix-Table-D13.tex"))
stargazer(models.size,
          se = cse.size,
          type = "text",
          #type = "latex",
          label = "tb:size",
          font.size = "scriptsize",
          single.row = FALSE,
          title = "DiD Regression Results: Controlling for Size",
          dep.var.labels = "H-1B Denial Rates",
          model.numbers = TRUE,
          order = var.order.size,
          covariate.labels = replaceVarName(var.vec = var.label.size,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: Year",
                             rep("Yes", 3)),
                           c("Fixed Effects: Industry",
                             rep("Yes", 3)),
                           c("Group Size: Years",
                             formatC(length(levels(did.h1b.size$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.public$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.size.public$fe$year)), big.mark = ",")),
                           c("Group Size: Industries",
                             formatC(length(levels(did.h1b.size$fe$naics_core)), big.mark = ","),
                             formatC(length(levels(did.h1b.public$fe$naics_core)), big.mark = ","),
                             formatC(length(levels(did.h1b.size.public$fe$naics_core)), big.mark = ","))),
          omit.stat = c("res.dev",
                        "ser"),
          digits = 3,
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          #notes = c("Standard errors clustered by firms in parentheses. $^{*}$p$<$0.05; $^{***}$p$<$0.01; $^{***}$p$<$0.001. Note that year fixed effects subsume the time constitutive term of the interactions."),
          notes.align = "l",
          notes.append = FALSE)
sink()


## Appendix Table D.14: DiD Regression Results: Time Placebos ------------------
# Column 1
time.placebo.1 <- felm(h1b_deny_rate ~ lob_img_2017:post_2004
                       | bvd_id + year | 0 | bvd_id, 
                       keepX = TRUE,
                       data = df.pre)
summary(time.placebo.1)  

# Column 2
time.placebo.2 <- felm(h1b_deny_rate ~ 
                         lob_img_2017:year_1992 + 
                         lob_img_2017:year_1993 + 
                         lob_img_2017:year_1994 + 
                         lob_img_2017:year_1995 + 
                         lob_img_2017:year_1996 + 
                         lob_img_2017:year_1997 + 
                         lob_img_2017:year_1998 + 
                         lob_img_2017:year_1999 + 
                         lob_img_2017:year_2000 + 
                         lob_img_2017:year_2001 + 
                         lob_img_2017:year_2002 + 
                         lob_img_2017:year_2003 + 
                         lob_img_2017:year_2004 + 
                         lob_img_2017:year_2005 + 
                         lob_img_2017:year_2006 + 
                         lob_img_2017:year_2007 + 
                         lob_img_2017:year_2008 + 
                         lob_img_2017:year_2009 + 
                         lob_img_2017:year_2010 + 
                         lob_img_2017:year_2011 + 
                         lob_img_2017:year_2012 + 
                         lob_img_2017:year_2013 + 
                         lob_img_2017:year_2014 + 
                         lob_img_2017:year_2015 + 
                         #lob_img_2017:year_2016 + 
                         lob_img_2017:post_2017 
                       | bvd_id + year | 0 | bvd_id, 
                       keepX = TRUE,
                       data = df)
summary(time.placebo.2)

# Column 3
time.placebo.3 <- felm(h1b_deny_rate ~ 
                         lob_img_2017:post_2017 + 
                         lob_img_2016:post_2017
                       | bvd_id + year | 0 | bvd_id, 
                       keepX = TRUE,
                       data = df)
summary(time.placebo.3) 

# collect results
models.p.time <- list(time.placebo.1,
                      time.placebo.2,
                      time.placebo.3)

# extract clustered SEs
cse.p.time <- list(time.placebo.1$cse,
                   time.placebo.2$cse,
                   time.placebo.3$cse)

# set var order
var.order.p.time <- c("^lob_img_2017:post_2004$",
                      "^lob_img_2017:post_2017$",
                      "^post_2017:lob_img_2016$",
                      "^lob_img_2017:year_1992$",
                      "^lob_img_2017:year_1993$",
                      "^lob_img_2017:year_1994$",
                      "^lob_img_2017:year_1995$",
                      "^lob_img_2017:year_1996$",
                      "^lob_img_2017:year_1997$",
                      "^lob_img_2017:year_1998$",
                      "^lob_img_2017:year_1999$",
                      "^lob_img_2017:year_2000$",
                      "^lob_img_2017:year_2001$",
                      "^lob_img_2017:year_2002$",
                      "^lob_img_2017:year_2003$",
                      "^lob_img_2017:year_2004$",
                      "^lob_img_2017:year_2005$",
                      "^lob_img_2017:year_2006$",
                      "^lob_img_2017:year_2007$",
                      "^lob_img_2017:year_2008$",
                      "^lob_img_2017:year_2009$",
                      "^lob_img_2017:year_2010$",
                      "^lob_img_2017:year_2011$",
                      "^lob_img_2017:year_2012$",
                      "^lob_img_2017:year_2013$",
                      "^lob_img_2017:year_2014$",
                      "^lob_img_2017:year_2015$")

# set var label
var.label.p.time <- str_replace_all(var.order.p.time, "\\^", "")
var.label.p.time <- str_replace_all(var.label.p.time, "\\$", "")

# save
sink(file.path(MAIN_DIR, "Appendix-Table-D14.txt"))
#sink(file.path(MAIN_DIR, "Appendix-Table-D14.tex"))
stargazer(models.p.time,
          se = cse.p.time,
          type = "text",
          #type = "latex",
          label = "tb:time-placebo",
          font.size = "scriptsize",
          single.row = FALSE,
          title = "DiD Regression Results: Time Placebos",
          dep.var.labels = "H-1B Denial Rates",
          model.numbers = TRUE,
          order = var.order.p.time,
          covariate.labels = replaceVarName(var.vec = var.label.p.time,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: Firm (BvD ID)",
                             rep("Yes", 3)),
                           c("Fixed Effects: Year",
                             rep("Yes", 3)),
                           c("Group Size: Firms",
                             formatC(length(levels(time.placebo.1$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(time.placebo.2$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(time.placebo.3$fe$bvd_id)), big.mark = ",")),
                           c("Group Size: Years",
                             formatC(length(levels(time.placebo.1$fe$year)), big.mark = ","),
                             formatC(length(levels(time.placebo.2$fe$year)), big.mark = ","),
                             formatC(length(levels(time.placebo.3$fe$year)), big.mark = ",")),
                           c("Exclude Post-treatment Obs.",
                             "Yes", "No", "No")),
          omit.stat = c("res.dev",
                        "ser"),
          digits = 3,
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          #notes = c("Standard errors clustered by firms in parentheses. $^{*}$p$<$0.05; $^{***}$p$<$0.01; $^{***}$p$<$0.001. Note that firm and year fixed effects subsume the constitutive terms of the interactions. Column (2) decomposes the treatment effect over time by interacting the treatment group indicator with time dummies for each year except for 2016 (the last pre-treatment period). Thus, coefficients in the model represent the estimated treatment effect in year $t$ compared to that of 2016 (the omitted baseline)."),
          notes.align = "l",
          notes.append = FALSE)
sink()


## Appendix Table D.15: DiD Regression Results: Placebo Treatments -------------
# Column 1
did.h1b.lob.tob <- felm(h1b_deny_rate ~ 
                          lob_only_tob_2017:post_2017
                        | bvd_id + year | 0 | bvd_id, 
                        keepX = TRUE,
                        data = df)
summary(did.h1b.lob.tob)  

# Column 2
did.h1b.lob.bev <- felm(h1b_deny_rate ~ 
                          lob_only_bev_2017:post_2017
                        | bvd_id + year | 0 | bvd_id, 
                        keepX = TRUE,
                        data = df)
summary(did.h1b.lob.bev)

# Column 3
did.h1b.lob.cdt <- felm(h1b_deny_rate ~ 
                          lob_only_cdt_2017:post_2017
                        | bvd_id + year | 0 | bvd_id, 
                        keepX = TRUE,
                        data = df)
summary(did.h1b.lob.cdt)

# collect results models
models.p.treat <- list(did.h1b.lob.tob,
                       did.h1b.lob.bev,
                       did.h1b.lob.cdt)
# extract clustered SEs
cse.p.treat <- list(did.h1b.lob.tob$cse,
                    did.h1b.lob.bev$cse,
                    did.h1b.lob.cdt$cse)

# set var order
var.order.p.treat <- c("^lob_only_tob_2017:post_2017$",
                       "^lob_only_bev_2017:post_2017$",
                       "^lob_only_cdt_2017:post_2017$")

# set var label
var.label.p.treat <- str_replace_all(var.order.p.treat, "\\^", "")
var.label.p.treat <- str_replace_all(var.label.p.treat, "\\$", "")

# save
sink(file.path(MAIN_DIR, "Appendix-Table-D15.txt"))
#sink(file.path(MAIN_DIR, "Appendix-Table-D15.tex"))
stargazer(models.p.treat,
          se = cse.p.treat,
          type = "text",
          #type = "latex",
          label = "tb:treat-placebo",
          font.size = "scriptsize",
          single.row = FALSE,
          title = "DiD Regression Results: Placebo Treatments",
          dep.var.labels = "1992--2017 H-1B Denial Rates",
          model.numbers = TRUE,
          order = var.order.p.treat,
          covariate.labels = replaceVarName(var.vec = var.label.p.treat,
                                            var.df = var.df),
          add.lines = list(c("Fixed Effects: Firm (BvD ID)",
                             rep("Yes", 3)),
                           c("Fixed Effects: Year",
                             rep("Yes", 3)),
                           c("Group Size: Firms",
                             formatC(length(levels(did.h1b.lob.tob$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.bev$fe$bvd_id)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.cdt$fe$bvd_id)), big.mark = ",")),
                           c("Group Size: Years",
                             formatC(length(levels(did.h1b.lob.tob$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.bev$fe$year)), big.mark = ","),
                             formatC(length(levels(did.h1b.lob.cdt$fe$year)), big.mark = ","))),
          omit.stat = c("res.dev",
                        "ser"),
          digits = 3,
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          #notes = c("Standard errors clustered by firms in parentheses. $^{*}$p$<$0.05; $^{***}$p$<$0.01; $^{***}$p$<$0.001. Note that firm and year fixed effects subsume the constitutive terms of the interactions."),
          notes.align = "l",
          notes.append = FALSE)
sink()

